MacTech 1 to 12

home *** CD-ROM | disk | FTP | other *** search

/ MacTech 1 to 12 / MacTech-vol-1-12.toast / Source / MacTech® Magazine / Volume 06 - 1990 / 06.11 Nov 90 / NeuralNet Estimator⁄EDIT / Gradient methods.c < prev next >

Wrap

C/C++ Source or Header | 1989-08-03 | 14.2 KB | 436 lines | [TEXT/KAHL]

#include "Neural Network.h" #include <math.h> extern FILE * Jac; extern NeuralNet * theNet; extern DTypeVector yData; extern DTypeMatrix XData; extern DTypeVector Alpha[]; extern DTypeMatrix Jac_T; extern DTypeVector Pi; extern DTypeVector Diag; extern DTypeVector Resid; extern DTypeVector dSquash[]; extern DTypeVector gradSS; extern DTypeMatrix Phi, T2; static unsigned int totparms; /* total parms in model, set by AllotGradientWorkSpace() */ /*------------------------- Find parameter values that minimize sum of squares, uses line search method. Expects: 1. tolerance values gradtol, steptol; which are stored in NeuralNet structure. 2. validly defined and alloted NeuralNet structure. 3. input data in matrix XData and output data values in vector yData. Returns termcode = FAIL if current parm value is not an approximate critical point = METGRADTOL if norm of scaled gradient less than gradtol = METSTEPTOL if scaled step is less than steptol = LINEFAIL if linesearch failed to find next parm distinct from current value = EXCEDITNLIM if iteratation limit exceeded = SINGJAC if Jacobian is singular */ do_GaussNewton() { int itncount = 0; /* start iteration count at zero */ int termcode = FAIL;/* termcode must be changed to nonzero in order to terminate */ int retcode; int sing; /* flag for singularity of Jacobian */ double SS = 0; while(termcode == FAIL) { itncount += 1; /* increment iteration count */ SS = Compute_SS_Jac(); /* this also computes the Jacobian, stored as transpose */ Matrix_by_Vec(&Jac_T, &Resid, &gradSS); /* Compute the gradient of the SS function = Jac_T*Resid. should be before next step calculation since that computation affects the values in Resid */ sing = OLSbyQRmethod(&Jac_T,&Pi,&Diag,&Resid); /* Compute next step, step increment stored in Diag sing TRUE => singular Jacobian, so stop */ if(sing) termcode = SINGJAC; else { SaveParms(&Pi); /* save weights in case linesearch fails,the Pi vector is not being used at this point, so is reused to save space */ if(theNet->usemaxstep == TRUE) ConstrainStep(); /* constrain step size to maxstep */ retcode = LineSearch(&SS); /* also sets the SS value */ termcode = StopYet(SS,retcode,itncount); } }/* end of while(termcode == FAIL) */ return(termcode); } /*------------------------- Find parameter values that minimize sum of squares, uses quasi Gauss-Newton method combined with line search. Adds a nonsingular diagonal matrix to Jacobian to overcome singularity problems. Takes more memory than Gauss-Newton. Similar to Levenberg- Marquardt but trust region is a constant since only trying to fix singularity of Jacobian problem. Expects: 1. tolerance values gradtol, steptol; which are stored in NeuralNet structure. 2. validly defined and alloted NeuralNet structure. 3. input data in matrix XData and output data values in vector yData. 4. total number of parameters for model in "totparms". Returns termcode = FAIL if current parm value is not an approximate critical point = METGRADTOL if norm of scaled gradient less than gradtol = METSTEPTOL if scaled step is less than steptol = LINEFAIL if linesearch failed to find next parm distinct from current value = EXCEDITNLIM if iteratation limit exceeded = NETSAT if network is possibly oversaturated */ do_quasiGaussNewton() { int i; int itncount = 0; /* start iteration count at zero */ int termcode = FAIL; /* termcode must be changed to nonzero in order to terminate */ int retcode; int sing; /* flag for singularity of Jacobian */ double SS = 0; while(termcode == FAIL) { itncount += 1; /* increment iteration count */ ClearDTypeMatrix(&Jac_T); SS = Compute_SS_Jac(); /* this also computes the Jacobian, stored as transpose */ AppendResidZeros(); /* append "totparms" zeros to Resid vector */ AppendMuIdentity(); /* append Mu times Identity matrix to end of Jacobian */ ComputegradSSforqGN(); /* Compute the gradient of the SS function = Jac_T*Resid. Should be before calculation of next step since that computation affects the values in Resid vector. */ sing = OLSbyQRmethod(&Jac_T,&Pi,&Diag,&Resid); /* Compute next step, step increment stored in Diag sing TRUE => singular Jacobian, so stop */ if(sing) termcode = NETSAT; else { SaveParms(&Pi); /* save weights in case linesearch fails,the Pi vector is not being used at this point, so is reused to save space */ if(theNet->usemaxstep == TRUE) ConstrainStep(); /* constrain step size to maxstep */ retcode = LineSearch(&SS); /* also sets the SS value */ termcode = StopYet(SS,retcode,itncount); } } /* end of while(termcode == FAIL) */ return(termcode); } /*------------------------- Append zeros to the Resid vector. */ static AppendResidZeros() { int i; DataType * v; /* pointer to elements of residual vector */ v = *Resid.cells + XData.rows; /* XData.rows is the number of observations */ for(i=0; i<totparms; i++, v++) *v = 0.0; } /*------------------------- Special function to compute gradient for quasi Gauss-Newton method. */ static ComputegradSSforqGN() { register int i; register int j; register int R; /* number of rows in matrix */ register int C; /* number of columns in matrix */ register DataType temp; /* ? this uses a floating pt register on 68881 */ register DataType * y; /* following declarations use up the 3 address registers */ register DataType * row; register DataType * vec; R = Jac_T.rows; C = Jac_T.cols; vec = *Resid.cells; y = *gradSS.cells; for(i=0; i<R; y++, i++) { row = *Jac_T.cells + i*C; temp = (*row)*(*vec); for(j=1; j<XData.rows; j++) /* use XData.rows here instead of Jac_T.cols */ temp += (*(row+j))*(*(vec+j)); *y = temp; } } /*------------------------- Append a matrix Mu times the Identity matrix to the Jacobian. */ static AppendMuIdentity() { register int i,j; register DataType * jcell; /* pointer to cells of Jacobian matrix, stored as transpose */ register DataType MuI = .001; /* In the case of quasi Gauss-Newton method the Jacobian has appended to it a (totparms x totparms) Identity matrix. Thus, *Jac_T.cells + XData.rows + i*Jac_T.cols points to the first element of the ith row of this identity matrix, which is the row i, column (XData.row +1) of the Jac_T matrix. */ for(i=0; i<totparms; i++) { jcell = *Jac_T.cells + XData.rows + i*Jac_T.cols; for(j=0; j<i; j++, jcell++) *jcell = 0.0; *jcell = MuI; j++ , jcell++; for(; j<totparms; j++, jcell++) *jcell = 0.0; } } /*---------------------- Constrain the step of a Gauss-Newton or quasi Gauss-Newton method. */ static ConstrainStep() { register int i; register int N = Diag.rows; register double snorm = 0.0; register double pnorm = 0.0; register DataType * cell; register double K; cell = *Diag.cells; for(i=0; i<N; i++, cell++) snorm += pow((double)*cell,2.0); cell = *Pi.cells; for(i=0; i<N; i++, cell++) pnorm += pow((double)*cell,2.0); K = (theNet->maxrelstep*pnorm/snorm) + (theNet->maxstep/sqrt(snorm)); cell = *Diag.cells; for(i=0; i<N; i++, cell++) *cell = (*cell)*K; } /*----------------------- Alg A6.3.1 of Dennis and Shanabel. Expects full Gauss-Newton step in vector Diag. Must set the SS value. Returns retcode = LINEOK if satisfactory new parm value found = LINENOTOK if can't find a step small enough to reduce SS */ static LineSearch(ss_) double * ss_; /* pointer to value of Sum of Squares */ { double ss; /* value of Sum of Squares */ double minlambda; /* min lambda step allowed, smaller than this and would meet stop criteria anyway */ double initslope; /* initial slope of SS function */ double lambda = 1.0; /* step size, start off with full step */ int retcode = 2; /* return code for search */ double a = .0001; /* constant used in step acceptance test, called alpha in source, this value recommended by Dennis and Shanabel */ initslope = -Vec_by_Vec(&gradSS,&Diag); /* Compute the initial slope of SS function, the Diag function contains the negative of the next step, so to get the required negative initial slope would need to multiply elements of Diag by -1, instead put a negative sign in front of the calculation */ minlambda = Compute_minlambda(); /* Compute the minimum lambda */ SaveParms(&Pi); /* the Pi vector is not being used at this point, so is reused to save space */ while(retcode > 1) { UpdateParms(lambda); ss = Compute_SS(); if(ss < *ss_ + a*lambda*initslope) /* true => satisfactory new parm value found */ { retcode = LINEOK; *ss_ = ss; /* reset the Sum of Squares value before exiting */ } else if(lambda < minlambda) { retcode = LINENOTOK; RestoreParms(&Pi); /* restore previous iteration's weight values for network don't need to reset SS since minimization can't continue */ } else lambda = .1*lambda; } return(retcode); } /*----------------------- Update the weight parameters by lambda times the step given in vector Diag. Expects old weight values to be in the vector Pi. */ static UpdateParms(lambda) DataType lambda; { int j,k,N; DataType * p; /* pointer to step value */ DataType * w_; /* pointer to previous weight values */ DataType * w; /* pointer to weight matrix */ p = *Diag.cells; w_ = *Pi.cells; for(j=0; j<theNet->OutLayer; j++) { N = (theNet->W[j].rows)*(theNet->W[j].cols); w = *theNet->W[j].cells; for(k=0; k<N; k++, w++, w_++, p++) *w = *w_ - lambda*(*p); } } /*------------------------- Determine if should stop searching for minimum. Expects step in vector Diag. Returns termcode = FAIL if current parm value is not an approximate critical point = METGRADTOL if norm of scaled gradient less than gradtol = METSTEPTOL if scaled step is less than steptol = LINEFAIL if linesearch failed to find next parm distinct from current value = EXCEDITNLIM if iteratation limit exceeded */ static StopYet(ss,retcode,itncount) double ss; /* the Sum of Squares value should be set by LineSearch() */ int retcode; /* return code from LineSearch() */ int itncount; { int j,k,N; DataType * v; /* pointer to cells in vectors gradSS or Diag*/ DataType * w; /* pointer to cell of weight matrix */ double rel = 0.0; /* hold value of relative gradient */ double max = 0.0; /* hold maximum relative gradient value */ int termcode = FAIL; if (retcode == LINENOTOK) termcode = termcode | LINEFAIL; /*---- First check for zero gradient ----*/ v = *gradSS.cells; for(j=0; j<theNet->OutLayer; j++) { N = (theNet->W[j].rows)*(theNet->W[j].cols); w = *theNet->W[j].cells; for(k=0; k<N; k++, v++, w++) { rel = fabs((double)((*v))*(fabs((double)(*w))/ss)); /* don't need abs(ss) since ss is always positive */ max = (max < rel) ? rel : max; } } if (max < theNet->gradtol) termcode =(termcode | METGRADTOL); /*---- Second check for zero step ----*/ v = *Pi.cells; max = 0.0; for(j=0; j<theNet->OutLayer; j++) { N = (theNet->W[j].rows)*(theNet->W[j].cols); w = *theNet->W[j].cells; for(k=0; k<N; k++, v++, w++) { rel = fabs(((double)(*v-*w))/((double)(*w))); /* don't need abs(ss) since ss is always positive */ max = (max < rel) ? rel : max; } } if (max < theNet->steptol) termcode =(termcode | METSTEPTOL); if (itncount > theNet->itnlimit) termcode = (termcode | EXCEDITNLIM); return(termcode); } /*----------------------- Compute the minimum lambda allowed for line search. Any lambda value lower than this value would meet the stop criteria for minimum step anyway. */ static double Compute_minlambda() { int j,k,N; DataType * p; /* pointer to cells in Diag, the proposed step size */ DataType * w; /* pointer to cell of weight matrix */ double rellength = 0.0; /* hold kth value of relative gradient */ double maxrel = 0.0; /* hold maximum relative gradient value */ p = *Diag.cells; for(j=0; j<theNet->OutLayer; j++) { N = (theNet->W[j].rows)*(theNet->W[j].cols); w = *theNet->W[j].cells; for(k=0; k<N; k++, p++, w++) { rellength = fabs((double)((*p)/(*w))); if (maxrel < rellength) maxrel = rellength; } } rellength = theNet->steptol/maxrel; /* just using rellength to calculate return value */ return(rellength); } /*---------------------- Allot memory for data structures used by the Jacobian matrix and other structures used in minimization. Physical storage of Jacobian is as the transpose. Requires # observations from the data structure. Since is always run before method, also sets the totparms variable. */ AllotGradientWorkSpace() { int i; int mxprms = 1; /* keep track of layer with largest number of parameters */ totparms = 0; for(i=0; i<theNet->OutLayer; i++) { totparms += (theNet->Units[i+1])*(theNet->Units[i]); AllotDTypeVector(&Alpha[i], theNet->Units[i]); /* values for Unit[i] in each layer must be set prior to execution of this step. Also note that the Alpha[0] vector is not used, instead a pseudo vector is created from a row of the XData matrix. Thus, the allocation for i=0 wastes (Alpha[0].row)*sizeof(DataType) bytes. */ AllotDTypeVector(&dSquash[i], Alpha[i].rows); if(Alpha[i].rows > mxprms) mxprms = Alpha[i].rows; } AllotDTypeVector(&(Alpha[theNet->OutLayer]), 1); /* this allots space for output layer, a scaler but aesthetically pleasing */ if (theNet->method == qGaussNew) { AllotDTypeMatrix(&Jac_T, totparms, XData.rows+totparms); /* actually is the transpose of Jac */ AllotDTypeVector(&Resid,XData.rows+totparms); } else { AllotDTypeMatrix(&Jac_T,totparms,XData.rows); AllotDTypeVector(&Resid,XData.rows); } AllotDTypeVector(&Pi,totparms); AllotDTypeVector(&Diag,totparms); AllotDTypeVector(&gradSS,totparms); AllotDTypeMatrix(&Phi,1,mxprms); /* Phi and T2 are always a row vector since output layer is a scaler */ AllotDTypeMatrix(&T2,1,mxprms); /* spcify as a matrix instead of vector */ } LockGradientWorkSpace() { HLock(Resid.cells); HLock(Jac_T.cells); HLock(Pi.cells); HLock(Diag.cells); HLock(gradSS.cells); HLockNet(); } UnlockGradientWorkSpace() { HUnlock(Resid.cells); HUnlock(Jac_T.cells); HUnlock(Pi.cells); HUnlock(Diag.cells); HUnlock(gradSS.cells); HUnlockNet(); }